*    ---------------------------------------------------
*                                                       
*                 ______________  _________ 
*                / ___/_  __/   |/_  __/   |
*                /__ / / / / /| | / / / /| |
*               ___/ // / / ___ |/ / / ___ |
*              /____//_/ /_/  |_/_/ /_/  |_|
*               
*                                                       
*    ---------------------------------------------------
*               
*           ====
*           课件：https://gitee.com/arlionn/graph
*           ====
*                                                       
*                                                       
*                主讲人：连玉君                   
*                                                       
*        单  位：中山大学岭南学院金融系      
*        电  邮: arlionn@163.com                        
*        
*        主  页: https://www.lianxh.cn (最新推文都在这里, 百度：连享会)
*        知  乎: https://www.zhihu.com/people/arlionn/
*        微  博：http://weibo.com/arlionn 
*        b   站：https://space.bilibili.com/546535876 (b站搜索：连享会)

*        微  信：lianyj45                               
*        公众号：连享会 ( ID: lianxh_cn )               		  
 

    
		   
*             ============================
*                :: 实证分析可视化 ::
*             ============================

	
*-注意：执行后续命令之前，请先执行如下几条命令

* =课件放置=：
  global FF "D:/"  //盘符
  global foldName "Lianxh_Graph" // 设置文件夹名称
  
  
*-以下都是相对路径，无需修改
  cd "$FF"
  cap mkdir "$foldName"
  cd "$foldName"
  cap mkdir fig 
  cap mkdir data 
  
  global path "$FF/$foldName"   //课件目录
  global data "$path/data"
  global fig  "$path/fig"

  adopath +   "$PP/adofiles"    //外部命令 
  
  cd "$fig"   // 工作路径


//-----------------
//安装和设置绘图模板 (若已安装, 可以忽略)
/*
  graph query, schemes // 列示电脑中已经安装的图形模板

. set scheme s2color   // Stata 默认, 彩色
. set scheme s2mono    // Stata 默认, 黑白

  net install scheme_scientific.pkg, replace
. set scheme scientific // 黑白图，很不错

  ssc install schemepack, replace  //white_tableau 模板
. set scheme white_tableau //设定绘图风格为white_tableau，R 图形风格  
*/ 


version 17

/*
 == 实证分析可视化 ==
 
    https://gitee.com/arlionn/PX

    o 为什要可视化？
    o Stata 绘图命令的架构
    o 直方图与密度函数图：histogram, kdensity, biplot
    o 分仓散点图：binscatter，binscatter2
    o 系数及系数差异的可视化呈现：coefplot
    o 调节效应、倒 U 型关系及边际效应的可视化
    o 面板数据、多个控制变量、高维固定效应模型的可视化
    o 长期与短期关系的可视化
    o 范文：2 篇
*/

/*
  set scheme white_tableau 
  set scheme tufte
*/



  *------------ 执行如下命令可以查看绘图详情 --------------
  
        . ssc install lianxh, replace 

        . lianxh 绘图 
        
        . lianxh 可视化
        
        . lianxh 散点 直方 柱
        
  *----------------------------------------------------------      






*------------------
*-3.1 为什要可视化？
*------------------

*~~~~~~~~~
*-Why 01 ？发现数据特征和问题
*       Anscombe (1973)

clear
input x      y1    y2    y3   x4    y4 
     10.0  8.04  9.14  7.46  8.0  6.58 
      8.0  6.95  8.14  6.77  8.0  5.76 
     13.0  7.58  8.74 12.74  8.0  7.71 
      9.0  8.81  8.77  7.11  8.0  8.84 
     11.0  8.33  9.26  7.81  8.0  8.47 
     14.0  9.96  8.10  8.84  8.0  7.04 
      6.0  7.24  6.13  6.08  8.0  5.25 
      4.0  4.26  3.10  5.39 19.0 12.50 
     12.0 10.84  9.13  8.15  8.0  5.56 
      7.0  4.82  7.26  6.42  8.0  7.91 
      5.0  5.68  4.74  5.73  8.0  6.89
end 
label data "Anscombe (1973), The American Statistician, 27(1): 17-21, Table 1"
save "$data/Anscombe1973", replace 

global opt "legend(off) lc(green*2)"
tw scatter y1 x  || lfit y1 x , $opt xtitle("Fig 1")
graph save "Anscombe1973_1_temp.gph", replace 
tw scatter y2 x  || lfit y2 x , $opt xtitle("Fig 2")
graph save "Anscombe1973_2_temp.gph", replace
tw scatter y3 x  || lfit y3 x , $opt xtitle("Fig 3")
graph save "Anscombe1973_3_temp.gph", replace
tw scatter y4 x4 || lfit y4 x4, $opt xtitle("Fig 4")
graph save "Anscombe1973_4_temp.gph", replace

*-combine
graph combine "Anscombe1973_1_temp.gph" "Anscombe1973_2_temp.gph" ///
              "Anscombe1973_3_temp.gph" "Anscombe1973_4_temp.gph", xcommon ycommon
graph export "$fig/Anscombe1973_Figs.png", replace width(1200)


*~~~~~~~~~
*-Why 02 ？模拟分析，理解计量理论

*---------  
*-例1: OLS 估计的无偏性

* 从 population 中随机抽样，虽然每次的估计结果有真实值都有差异，
* 但如果做 K=1000 次，他们的均值应该非常接近真实值
*
*    E[b_ols] = b_0
* 
* 1. MC 生成 N=100000 的样本，视为 population
* 2. 随机抽取 n=50 的子样本， 视为 sample，计算 b 和 se
* 3. 把第二步重复做 K=1000 次，系数记为 b_j, 计算 Mean(b_j), 并于 b0 对比
*
*    预期:  Mean(b_j) = b_0

clear 
set seed 1357
set obs 100000

gen x = rnormal(0,1)
gen y = 10 + 0.5*x + rnormal()
gen b = .
gen b_se = .
local  n = 50       // 每次抽样的样本数
global K = 1000     // 模拟次数
gen id = _n in 1/$K // 样本序号
forvalues j =1/$K {
   preserve
     qui sample `n', count
     qui reg y x 
   restore
   qui replace    b = _b[x]  in `j'
   qui replace b_se = _se[x] in `j'
   dis "." _c
}

*-图示系数估计值
  kdensity b, xline(0.5, lp(dash) lc(red) noextend)
  graph export "OLS_unbias_b_density.png", width(1200) replace

  scatter b id
  
  *----------详细版本----------------begin------
  sum b, detail
  local b_U_CI90 = r(p95) // 90% CI 下限
  local b_L_CI90 = r(p5)  // 90% CI 上限
  scatter b id, ///
      mcolor(black*0.4%80) msize(*0.6) ///
      yline(0.5, lp(dash) lc(red) noextend) ///
      yline(`b_U_CI90' `b_L_CI90', lc(blue) lw(*1.5)) ///
      ylabel(0.1(0.2)0.9, format(%2.1f))
  *-----------------------------------over-------  
  graph export "OLS_unbias_b.png", width(1200) replace   
  
*-图示标准误
  kdensity b_se 
  
*-图示 t 值
  cap drop t
  gen t = b/b_se
  kdensity t, xline(1.96, lp(dash) lc(red) ///
     noextend) normal ///
     legend(order(1 "b_j den" 2 "Normal den") ring(0) pos(3)) ///
     title(" ")
  graph export "OLS_t_value_kden.png", width(1200) replace   
  
  count if (t>1.96)&(t!=.)
  dis "reject-ratio = " 1-r(N)/$K // should be 0.05
  
*-Q: 
*
*  [1] 把 local n=100 改为 30, 500 等数值，b 和 se 的结果有何变化？
*
*  [2] n=50 时, t-value 服从正态分布吗？ n=10000 呢？  
  
  
*---------  
*-例2: OLS 估计的一致性

* 随着样本数的增大，OLS 估计值趋向于真实值，方差趋近于 0
* 1. MC 生成 N=100000 的样本，视为 population
* 2. 随机抽取 n=10,20,...1000, 1100,...,30000 的子样本，计算 b 和 se
* 3. 绘图
clear 
set seed 13579
set obs 100000
gen x = rnormal(0,1)
gen y = 10 + 0.5*x + rnormal()
local j = 1
gen n = .
gen b = .
gen b_se = .
foreach i of numlist 10(10)1000 1100(100)30000{
   preserve
     qui sample `i', count
     qui reg y x 
   restore
   qui replace    n = e(N)   in `j'
   qui replace    b = _b[x]  in `j'
   qui replace b_se = _se[x] in `j++'
   dis "." _c
}

*-图示系数估计值
  line b    n , xscale(log) xlabel(10 50 100 500 1000 5000 10000 50000) 
  
*-图示标准误
  line b_se n , xscale(log) xlabel(10 50 100 500 1000 5000 10000 50000)

*-图示系数估计值+标准误
  
  tw (line b n) (line b_se n), ///
     yline(0.5, lp(dash)) ylabel(0(0.1)0.8) ///
     xscale(log) xlabel(10 50 100 400 1000 5000 30000) ///
     legend(ring(0) pos(2)) ///
     scheme(tufte)
     
  graph export "OLS_consis_b_se.png", replace    
     


*~~~~~~~~~
*-Why 03 ？ 做有灵魂的图形    
     
*------------------
*-例 3: 何谓 Fixed Effect?

  *-产生模拟数据
  local het = 0
  if `het'!=1{       // b1=b2=b3
     local b1 = 0.4
     local b2 = 0.4
     local b3 = 0.4
     local title "homo"
  }
  else{              // beta_j
     local b1 = 0.4
     local b2 = `b1'+0.2
     local b3 = `b1'+0.4
     local title "het"
  }
  
  clear
  set obs 60
  set seed 13599
  
  egen id= seq(), from(1) to(3) block(20)
  bysort id : gen t = _n + 1990
  
  gen x1 = 3*rnormal() 
  gen  e = 1*rnormal()
  
  gen y = .

  gen     x = x1 + 0
  replace x = x1 + 4 if 1.id
  replace x = x1 - 5 if 3.id
  
  replace y = 6  + `b1'*x + e if id==1
  replace y = 10 + `b2'*x + e if id==2
  replace y = 15 + `b3'*x + e if id==3
  
  bysort id: center y x, prefix(c_)  // De-mean
  
  save "$data/sim_panel_`het'", replace 
  
  *-绘图 1：手动处理
  #delimit ;
    twoway (scatter y x     if id==1, mcolor(green) msymbol(+)) 
           (scatter y x     if id==2, mc(red)  ms(oh))
           (scatter y x     if id==3, mc(blue) ms(dh))
           (lfit    y x,    lcolor(black) lw(*1.5) lp(dash))    
           (lfit    y x     if id==1, lc(gray) lw(*1))
           (lfit    y x     if id==2, lc(gray) lw(*1))
           (lfit    y x     if id==3, lc(gray) lw(*1))
           (scatter c_y c_x if id==1, mcolor(green) msymbol(+)) 
           (scatter c_y c_x if id==2, mc(red)  ms(oh))
           (scatter c_y c_x if id==3, mc(blue) ms(dh))
           (lfit    c_y c_x,lcolor(black) lw(*1.8) lp(dash))    
           (lfit    c_y c_x if id==1, lc(gray) lw(*1))
           (lfit    c_y c_x if id==2, lc(gray) lw(*1))
           (lfit    c_y c_x if id==3, lc(gray) lw(*1))
           , 
           yline(5, lp(longdash) lc(blue*1.5%30))
           ylabel(,angle(0))
           text(15.5 9.4 "Raw") text( 4 8.8   "De-mean")
           legend(off) aspect(0.8) ;
  #delimit cr
  graph export "FWL_demean_`title'.png", replace width(1200)

  
  *-绘图 2：使用 binscatter
    *use "$data/sim_panel_1.dta", clear 
/*
    set scheme white_tableau
*/
    binscatter y x, by(id)               // Raw
    
    binscatter y x, by(id) control(i.id) // De-mean
  
  *-合图/美化
    binscatter y x, by(id)  ///
        aspect(0.9) legend(off) ///
        savegraph(sim_FE_raw.gph) replace           
    
    binscatter y x, by(id) control(i.id) noaddmean  ///
        aspect(0.9) legend(off) ///
        savegraph(sim_FE_demean.gph) replace
  
    graph combine sim_FE_raw.gph sim_FE_demean.gph, ///
         col(2) imargin(small) 
    
    graph export "sim_FE_demean_binscatter.png", width(1200) replace 
  
  
  
  
*========================
*-3.2 Stata 绘图命令概览
*========================

*----------------------------
*-3.2.1 二维图命令的基本结构

  *-整体架构 
    
	* twoway (单元图1) (单元图2) (...)  , 选项1 选项2 ...
    
	* twoway  单元图1 || 单元图2 || ... , 选项1 选项2 ...
     
  *-单元图的定义
    
	* (单元图类型 y1 y2 ... x , 选项1 选项2 ...)
    * e.g.
    * (scatter price weight, msize(small) msymbol(oh))
     
  *-二维图选项的定义
    
	* 二维图选项标题 (定义内容 , 子选项 子选项 ...)

  *-图形的整体风格：由 scheme 来控制
/*  
    help   scheme 
    graph query, schemes     // 已安装的图形模板
    findit scheme 
    set scheme s1mono        // Stata 官方模板, 黑白
    set scheme scientific    // 用户模板, 黑白
    set scheme white_tableau // 用户模板, 彩色
 */
   
  *-图形输出和保存
  *  graph save   xxx.gph, replace // Stata 格式 .gph，便于后续 combine
  *  graph export xxx.png, replace // .png 格式，便于插入 word, md, LaTeX
   
*---------   
*-示例-G1：第一幅图

  sysuse "auto.dta", clear
  set scheme s1mono
  
  #d ;
  twoway
    (scatter mpg weight, msize(small) msymbol(oh))
    (lfit    mpg weight, lcolor(blue) 
                         lwidth(*1.3) 
                         lpattern(dash))
    ,
    title("My first Stata graph")
    note("www.lianxh.cn") ;
  graph export "graph_fig001.png", 
        width(800) replace ;
  #d cr 
   
  *-Note: #d; 用于声明后续命令使用「;」作为换行符，而非默认的回车键换行
  /*
     help delimit
  */

*---------   
*-示例-G2

   sysuse "sp500", clear
   replace volume = volume/1000
   keep in 1/57
  *----------------------------------------Begin
  #delimit ;
   twoway                        //help twoway
      (rspike hi low date, lw(*1.3))
      (line   close  date, 
         lpattern(solid) lwidth(*1.2) lcolor(blue))
     , 
     yscale(range(1100 1400))    //help axis_options
     ylabel(1100(100)1400, grid) //help axis_options
     ymtick(##5)                 //help axis_options
     xlabel(, angle(30))         //help axis_options
     ytitle("股价", place(top))  //help title_options
     subtitle("S&P 500", margin(b+2.5)) //help title_options
                                        //help graph text    
     xtitle("交易日")            //help title_options
     legend(order(1 "High-Low" 2 "Close") 
            ring(0) position(2) row(2)) //help legend_options
     note("数据来源: 雅虎财经！")      //help title_options
     scheme(s2mono);                    //help scheme 
  #delimit cr
  *----------------------------------------Over


  /* 酌情选用
   ssc install schemepack, replace // 安装模板
   set scheme white_tableau        // 指定模板
   
   set scheme scientific  // 适于投稿
  */

*---------   
*-示例-G3

  sysuse "sp500", clear
  replace volume = volume/1000
  
  *----------------------------------------Begin
  #delimit ;
   twoway 
      (rspike hi low date)
      (line   close  date)
      (bar    volume date, barw(.25) yaxis(2))
       in 1/57
     , 
     yscale(axis(1) r(900 1400))
     yscale(axis(2) r(  9   45))
     ylabel(, axis(2) grid)
     ytitle("股价: 最高, 最低, 收盘",place(top))
     ytitle("交易量 (百万股)", axis(2) 
             bexpand just(left))
     xtitle(" ")
     legend(off)
     subtitle("S&P 500", margin(b+2.5))
     note("数据来源: 雅虎财经！") ;
  #delimit cr
  *----------------------------------------Over
  graph export "sp500_rspike.png", replace


*----------------
*-3.2.2 绘图模板  

/*
    help   scheme 
    
    graph query, schemes     // 已安装的图形模板
    * 手动设置：编辑(E) → 首选项(P) → 图形首选项(G) → 【方案】下拉菜单
    
    findit scheme   //搜索新模板
*/

  sysuse "auto.dta", clear
  
  set scheme s1mono
  scatter price mpg, by(foreign)
  
  set scheme tufte
  scatter price mpg, by(foreign)
  
  set scheme white_tableau
  scatter price mpg, by(foreign)
  
  set scheme scientific
  scatter price mpg, by(foreign)
  
  scatter price mpg, by(foreign) scheme(s2color)
  
  *-自定制模板
   
   . lianxh 模板 + 图 // 执行该命令，查看推文
   
   * Stata：图形美颜-自定义绘图模板-grstyle-palettes: 
     view browse "https://www.lianxh.cn/news/8c1819ff61a8a.html"
   
   help grstyle   // Benn, J, SJ 18(3):491--502；SJ 18(4):786–802 
   view browse "https://www.stata-journal.com/article.html?article=gr0073"
   view browse "https://www.stata-journal.com/article.html?article=gr0073_1"
   
   *-安装
   ssc install  grstyle, replace 
   ssc install palettes, replace
   
   *-范例
   set scheme scientific
   grstyle init
   grstyle set plain, horizontal grid
   scatter price mpg, by(foreign) 
  
 
 
*===================================
*-3.3 直方图、分布函数和密度函数图
*===================================

*--------------
*-3.3.1 直方图

   help histogram
   
    *-概览
      sysuse nlsw88.dta, clear
      
      histogram wage
      gen ln_wage = ln(wage)
      histogram ln_wage         // 对数转换后往往更符合正态分布
        
    *-图形的纵坐标
      histogram wage            // 长条的高度对应样本数占总样本的比例，
	                            // 总面积为 1
      histogram wage, fraction  // 将长条的高度总和限制为 1
      histogram wage, frequency // 纵坐标为对应的样本数，而非比例

      
    *-其他选项
      sysuse nlsw88.dta, clear
      histogram ttl_exp, normal  // 附加正态分布曲线
      histogram wage, kdensity   // 附加密度函数曲线
      histogram wage, addlabels  // 每个长条上方附加一个表示其高度的数字
      
      histogram wage, by(race)   // 分组绘制

      
    *-离散变量的直方图
      histogram grade
      histogram grade, discrete  // 离散变量的直方图必须附加 discrete 选项
	  
	*-长条的显示-长条的横向间距
	  histogram wage, gap(50)
	  histogram wage, gap(90) scheme(s1mono)	  
	  histogram wage, gap(99.9) scheme(s1mono) blwidth(thick)
      
    *-分组绘制直方图
      sysuse "auto.dta", clear
      histogram mpg, percent discrete                  ///
          by(foreign, col(1) note(分组指标：汽车产地) ///
                      title("图3：不同产地汽车油耗")  ///
                      subtitle("auto.dta") )           ///
          ytitle(百分比) xtitle(每加仑英里数) 
    
    
*------------------
*-3.3.2 密度函数图

   *-Kernal 密度函数图
     
	 help kdensity 
	 
	 sysuse "nlsw88.dta", clear
	 kdensity wage
	 kdensity wage, normal
     
   *-比较不同子样本的密度函数
     sysuse "nlsw88.dta", clear
     gen lnwage = ln(wage)
     twoway (kdensity lnwage if 1.race)   ///
            (kdensity lnwage if 2.race)   ///
            (kdensity lnwage if race!=3), ///
            legend(order (1 "White" 2 "Black" 3 "All") ///
                   ring(0) pos(3) col(1))
     graph export "$fig/Fig-visual-mkdensity-01.png", replace
     
   *-使用 mkdensity 命令 (便于测试) 
     sysuse "nlsw88.dta", clear
     gen lnwage = ln(wage)
     global y   "ttl_exp"
     global y   "wage"
     global y   "lnwage"
     global gg  "union"
     global gg  "race"
     global gg  "collgrad"
     
     mkdensity $y, over($gg)


   *-附加置信区间    -akdensity- 外部命令 SJ 3(2):148--156
     
     help akdensity
     
     sysuse "nlsw88.dta", clear
     gen lnwage = ln(wage)
     akdensity lnwage, stdbands(2)     
     
   *-多变量密度函数图合图
     help multidensity
    
     sysuse "auto.dta", clear
     multidensity clear
     multidensity gen price weight mpg length
     multidensity juxta, combineopts(name(G7, replace))
     multidensity bystyle
   
     
*----------------------
*-3.3.3 累积分布函数图
      
  help cumul
  help distplot
  help cdfplot
    
  *-基本概念	
/*
    webuse hsng, clear
    save "$D/hsng.dta", replace 
*/
    use "$D/hsng.dta", clear
    cumul faminc, gen(cum)
       sort faminc
       list faminc cum, clean
       list faminc cum if _n<5 | _n>46, clean
/*
                faminc   cum  
         1.   14591.00   .02  
         2.   14641.00   .04  
         3.   15993.00   .06  
         4.   16167.00   .08  
               ... ...
        47.   22906.00   .94  
        48.   23112.00   .96  
        49.   23149.00   .98  
        50.   28395.00     1 
*/

    line cum faminc, sort

  *-对比：South v.s. other  
    cap drop cum_south cum_other 
    cumul faminc if region==3, gen(cum_south)
    cumul faminc if region!=3, gen(cum_other)
    
    *-累积分布图
    tw (line cum_south faminc)  ///
       (line cum_other faminc), ///
       legend(order (1 "South" 2 "Other") ring(0) pos(3) col(1))

    *-密度函数图
    tw (kdensity faminc if region==3)  ///
       (kdensity faminc if region!=3), ///
       legend(order (1 "South" 2 "Other") ring(0) pos(3) col(1))       
 
 
  *--------  
  *--displot-  (外部命令)更为简洁
    
	help distplot
	
   /*
    webuse hsng, clear
   */
    use "$D/hsng.dta", clear    

    distplot scatter faminc
    gen south = 3.region
    distplot line faminc, by(south)
	distplot connected faminc, trscale(ln(@)) //对数转换

    *-支持的图形种类
	* area bar connected dot dropline line scatter spike

      foreach t in area bar connected dot dropline line scatter spike {
        distplot `t' faminc, by(south)
      }

  *--------  
  *-cdfplot- 命令 

    help cdfplot 

	webuse hsng, clear
    gen south = 3.region
    cdfplot faminc, normal
    cdfplot faminc, by(south)
    cdfplot faminc, by(south) norm 
   
    *-示例：对数转换的作用
      sysuse "nlsw88.dta", clear
	  cdfplot wage, normal
	  gen ln_wage = ln(wage)
	  cdfplot ln_wage, normal
      
   *-Furthur reading:
   *  Cox, N., 2004, 
   *    Speaking stata: Graphing distributions, 
   *    STATA JOURNAL, 4(1): 66-88.   
   view browse "https://journals.sagepub.com/doi/pdf/10.1177/1536867X0100400106"
   *  Cox, N., 2004, 
   *    Speaking Stata: Graphing categorical and compositional data, 
   *    STATA JOURNAL, 4(1): 190-215.
   view browse "https://journals.sagepub.com/doi/pdf/10.1177/1536867X0400400209" 

   
   
*------------------
*-3.4 散点图
*------------------
  
  help scatter
  help multiline
  help binscatter
  help binscatter2
  help superscatter
  help binscatterhist
  help crossplot

*---------------
*-3.4.1 基本用法
  
*-原始数据的散点图  
  sysuse "auto.dta", clear
  twoway scatter mpg weight
  sc mpg weight // 简写
  
*-散点图矩阵
  help graph matrix 

  sysuse "auto.dta", clear
  local cx "weight length turn foreign"
  graph matrix mpg `cx', scheme(tufte)
 
  *-使用 multiline 命令
    sysuse "auto.dta", clear

    multiline mpg weight length displacement price, ///
          recast(scatter) c(none) by(col(2)) ms(Oh) ///
          subtitle(, orient(vertical)) 

    multiline mpg weight length displacement price, ///
       recast(scatter) c(none) by(col(2)) ms(Oh)    ///
       subtitle(, orient(vertical)) ///
       sepby(foreign) ms(Oh +)  
  
  
*- f(y) v.s. x  
  help crossplot
  
  sysuse "auto.dta", clear
  
  gen rt_mpg = sqrt(mpg)
  gen ln_mpg = ln(mpg)
  gen rec_mpg = 100/mpg
  
  crossplot (mpg rt_mpg ln_mpg rec_mpg) weight, combine(imargin(small))
 
  sysuse "nlsw88.dta", clear
  gen lnwage = ln(wage)
  crossplot (wage lnwage) (hours ttl_exp), combine(imargin(small))

  
*----------------------
*-3.4.2 分组均值散点图   

  sysuse "nlsw88.dta", clear
  scatter wage hours  // 看得出规律 ?
  
  *-分组均值
  global gg "industry"    // 行业
  global gg "occupation"  // 职业
  
  bysort $gg: egen mean_wage  = mean(wage)
  bysort $gg: egen mean_hours = mean(hours)
  bysort $gg: egen num = count(id)
  
  *-行业均值散点图
    scatter mean_wage mean_hours
    
  *-设定权重
    scatter mean_wage mean_hours [aw=num]
  
  *-添加标签  
    *-----------------------------------------Begin-----------
      scatter mean_wage mean_hours [aw=num]
      addplot: scatter mean_wage mean_hours [aw=num], ///
               mlabel($gg) mlabcolor(black) mlabposition(12) ///
               msize(*0.2) mcolor(white)  ///
               legend(off)
    *-----------------------------------------Over------------
    *-Notes: 
    * 1. 有没有离群值/特殊值 ？
    * 2. 如何进一步美化 ?
    
/*
    help scatter

    options                  Description
    --------------------------------------------------------------------
    marker_options           change look of markers (color, size, etc.)
    marker_label_options     add marker labels; change look or position
    connect_options          change look of lines or connecting method

    composite_style_option   overall style of the plot

    jitter_options           jitter marker positions using random noise

    axis_choice_options      associate plot with alternate axis

    twoway_options           titles, legends, axes, added lines and text,
                             by, regions, name, aspect ratio, etc.
    -------------------------------------------------------------------- 
*/
    
    
*------------------
*-3.4.4 分仓散点图 

  *-基本原理: E[ y | X=x, Controls ]  
  *
  * 基础: FWL 定理 (详见讲义和 Slides)
    
  webuse "nlswork.dta", clear	
  
  gen wage = exp(ln_wage)
  
  binscatter wage year, by(union)                   // left
  
  binscatter wage year, by(union) absorb(union)     // middle
  
  binscatter wage year, by(union) absorb(union) /// // right
             controls(i.race msp collgrad ttl_exp hours) msymbols(+ oh)
      
   
    
*---------------------------
*-3.4.5 分仓散点图 + 直方图 

  help binscatterhist

  sysuse "nlsw88.dta", clear  
  gen lnwage = ln(wage)

  global yx "wage tenure"
  binscatterhist $yx, histogram($yx) 
  binscatterhist $yx, histogram($yx) linetype(qfit)
  
  global yx "lnwage tenure"
  binscatterhist $yx, histogram($yx) ymin(2.3)
  binscatterhist $yx, histogram($yx) ymin(2.3) linetype(qfit)   
  graph export "$fig/binscatterhist_eg2.png", replace
  
  global yx "lnwage tenure"
  global cx "i.race i.industry ttl_exp collgrad"
  binscatterhist $yx, histogram($yx) controls($cx) n(100) 
  reg $yx $cx 
  esttab, nogap
  
  *-更多实例
  doedit "$do/binscatterhist_example.do"





*--------------------------
*-3.5 系数差异的可视化呈现：coefplot
*--------------------------


. sysuse "nlsw88.dta", clear
. gen agesq = age*age
*-分组虚拟变量
. drop if race==3
. gen black = 2.race
. tab black 
*-删除缺漏值 
. global xx "ttl_exp married south hours tenure age* i.industry"
. reg wage $xx i.race
. keep if e(sample)   
*-分组回归
. global xx "ttl_exp married south hours tenure age* i.industry"
. reg wage $xx if black==0 
. est store White
. reg wage $xx if black==1 
. est store Black
 *-结果对比
. local m "White Black"
. esttab `m', mtitle(`m') b(%6.3f) nogap drop(*.industry) ///
	 s(N r2_a) star(* 0.1 ** 0.05 *** 0.01) 

Table1: 白人组和黑人组工资影响因素差异对比

------------Table 1-------------------
                (1)             (2)   
              White           Black   
--------------------------------------
ttl_exp       0.251***     0.269***
             (6.47)          (4.77)   
married      -0.737**      0.091   
            (-2.31)          (0.23)   
... (output ommited) ...
--------------------------------------
N          1615.000         572.000   
r2_a          0.112           0.165   
--------------------------------------
t statistics in parentheses
* p<0.1, ** p<0.05, *** p<0.01

----------------------------------------
             White         Black        
----------------------------------------
 ttl_exp 
---------
   beta     0.251***    0.269***  
  95% CI  [0.17, 0.33]   [0.16, 0.38]   
----------------------------------------
 married 
---------
   beta     -0.737**      0.091      
  95% CI  [-1.36, -0.11]  [-0.69, 0.87] 
----------------------------------------


. global xline "xline(0,lp(dash) lc(red*0.5))"
. coefplot White Black, keep(ttl_exp married) ///
           nolabels $xline ciopt(recast(rcap))
. graph export "Fig01.png", replace

*-Also see: multicoefplot
  


*------------------------
*-图形呈现系数和置信区间
  
  *-Jann, B. "Plotting Regression Coefficients and Other Estimates". 
  * STATA JOURNAL, 2014, 14(4):708-737.
  shellout "$R\Jann_2014_coefplot.pdf"

  help coefplot
  
    sysuse "auto.dta", clear
    regress price mpg headroom trunk length turn
    coefplot, drop(_cons) xline(0)
    
	regress price mpg headroom trunk length turn if foreign==0
    estimates store domestic
    regress price mpg headroom trunk length turn if foreign==1
    estimates store foreign
    coefplot domestic foreign, drop(_cons) xline(0)

  *-详情参见：连享会 推文
    *-Stata可视化：让他看懂我的结果！
	view browse "https://www.lianxh.cn/news/01607de7be5e8.html"

   
        
*------------------------------------
*- 附：A3_graph 绘图模板 - 外部模板
*------------------------------------

*-----------------------------------------Begin-----------
  cap program drop myDraw
  program define myDraw
  version 17
  args figName schemeName
  twoway (function y1=cos(x),range(0 10))   ///
         (function y2=sin(x),range(0 10))   ///
         (function y3=x^(1/3),range(0 10)), ///
          title("scheme `schemeName' ")     ///
          legend(rows(1))                   ///
          saving("`figName'",replace)       ///
          nodraw
  end 
*-----------------------------------------Over------------
*-Notes: 
* 1. 选中，按快捷键 Ctrl+R，读入内存
* 2. 调用：
     myDraw b1 lean1


cap graph drop "b1" "b2" "b3" "b4" "b5" "b6"
local sch "lean1"
set scheme `sch'
myDraw  b1 `sch'

local sch "white_tableau"
set scheme `sch'
myDraw  b2 `sch'

local sch "scientific"
set scheme `sch'
myDraw  b3 `sch'


local sch "tufte"
set scheme `sch'
myDraw  b4 `sch'


local sch "rbn1mono"
set scheme `sch'
myDraw  b5 `sch'


local sch "cleanplots"
set scheme `sch'
myDraw  b6 `sch'
              
graph combine "b1" "b2" "b3" ///
              "b4" "b5" "b6", ///
              rows(3) saving(gr_scheme2)
          
graph export "gr_scheme2.png", width(1200) replace           


        